
* original do file 11b_SumStats


clear all
global main "/Users/inga/Dropbox/Ais project/Repository"
cd "$main"

global data "$main/data_intermediate"
global output "$data/output514"
global soutput "$data/server_output"
global figures "$main/figures"
global input "$main/data_input"
global baci "/Users/inga/Dropbox/Ais project/Repository/data_input/BACI"
global tables "$main/tables"


**********************************************************************
******************* network figures **********************************
**********************************************************************



** Figure with shortest routes from US

u "$data/fullhrs12", clear              
*drop if D1_a==D2_a 
bysort *acid: g N=_N
egen maxN=max(N), by(DEP ARR)
keep if N==maxN
g tt=A_time-D0_t
collapse (mean) tt, by(*acid DEP ARR)
bysort DEP ARR: g n=_n
compress
reshape long D@_acid, i(DEP ARR n tt) j(hop)

sort DEP ARR n hop
bysort DEP ARR  n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.

bysort DEP ARR  n: g A_acid = D_acid[_n+1]
drop if A_acid==.


rename D_ DD
rename A_ AA
rename DEP D_acid
rename ARR A_acid

merge m:1 A_acid D_acid using "$data/port_to_port_sample_small", keepusing(*country *name)
drop if _merge==2
drop _merge

rename A_country ARR_country
rename D_country DEP_country
rename A_port ARR_port_name
rename D_port DEP_port_name

rename D_acid DEP
rename A_acid ARR
rename DD D_acid
rename AA A_acid

merge m:1 A_acid D_acid using "$data/port_to_port_sample_small", keepusing(*country *name)
drop if _merge==2
drop _merge

egen mintt=min(tt), by(ARR_c DEP_c)
keep if tt==mintt

save "$data/nwdetail", replace

collapse (first) hop, by(*country tt n)

save "$data/nwbase", replace


u "$data/nwbase", clear
duplicates drop *country, force

keep if DEP_country=="US"
nwset D_country A_country, directed edgelist name(pt)
nwplot, generate(xco yco)     

merge 1:1 _nodelab using "$data/ccoord"
drop if _merge==2
drop _merge

nwplot, arrowfactor(.5) edgefactor(.1)  nodefactor(.01)  layout(nodexy) nodexy(A_lon A_lat) label(_nodelab) labelopt(mlabsize(1.5)) scale(.9)
graph export "$figures/network_us_dep.png", replace






/*


u $data/fullhrs12, clear
bysort *acid: g N=_N
collapse (count) D0_a, by(N) fast
egen T=total(D0)
sort N
g Share = D0_a/T
g CumShare = sum(Share)

keep if N<13
save help, replace


u $data/fullhrs12, clear
*drop direct routes
drop if D1_a==D2_a
bysort *acid: g N=_N
collapse (count) D0_a, by(N) fast
egen T=total(D0)
sort N
g Share = D0_a/T
g CumShareID = sum(Share)
drop Share T

merge 1:1 N using help
keep if _merge==3
drop _merge

twoway line CumShare CumShareID N if N<11, xlabel(1(1)10) graphregion(color(white)) ytitle("Cum. share in total number of fastest paths") xtitle("routes occuring # times") legend(label(1 "all routes") label(2 "indirect routes") )
graph export $figures/SS_Cums514.pdf, replace
erase help.dta


u $data/fullhrs12, clear
*bysort *acid: g N=_N
bysort *acid: g nn=_n
g nnn=nn if nn==1

collapse (count) D0_t (sum) nnn, by(D0_a D16_a) 
g s=nnn/D0_t

summ s

hist s






** make a table with routes to through CHN to complement response to R1 regarding network figure
u $data/nwdetail, clear
keep if DEP_c=="US"
g keepme=1 if A_count=="CN"

egen Keepme=total(keepme), by(DEP ARR)

keep if Keepme>0
sort DEP_por ARR_port hop



** and compare to direct US -> CN routes


u $data/port_to_port_ship_bal_clus_small, clear
keep if Restr==1
g travel_hs=hours(A_date-D_date)
drop if travel_hs==0
g A_day=dofc(A_date)-20453


g byte post =0
replace post=1 if A_day>184
keep if post==0

keep if D_c=="US" & A_c =="CN"

collapse (mean) travel_hs, by(A_port_name D_port_name)

*/

***************************************************************************************
***************************** Fig: tt vs distance **************************************
***************************************************************************************

u "$data/port_to_port_sample_small", clear
geodist A_lat A_lon D_lat D_lon, g(distance)
replace dist = 20000 if distance==.a
save "$data/port_to_port_sample_small_dist", replace


u "$data/fullhrs12", clear
g tt=A_t-D0_t
drop A_tim D0_t
g tt_dir = tt if D1_acid==D2_acid
keep D0_acid D16_acid tt*
rename D0 D_acid 
rename D16 A_acid

merge m:1 A_acid D_acid using "$data/port_to_port_sample_small_dist", keepusing(distance)
drop if _merge==2
drop _merge


collapse (median) tt tt_dir (first) dist, by(*acid)

twoway (scatter  tt  distance  if tt<4000, msize(.3) mcolor(dknavy) ) (scatter  tt_dir  distance, msize(.3) mcolor(ltblue)) (scatter  tt  distance  if tt<0 , mcolor(dknavy)) (scatter  tt_dir  distance if tt_dir<0, mcolor(ltblue)), ytitle("travel time in hours") xtitle(distance in km) scale(.7) graphregion(color(white)) legend(order(3 4) label(3 "fastest indirect routes") label(4 "direct routes") forcesize symysize(2) symxsize(2))
graph export "$figures/traveltime_distance.png", replace



erase "$data/port_to_port_sample_small_dist.dta"


************************************************************************************
********************* Fig: distribution of hops across cts *********************
************************************************************************************

/*

u "$data/nwdetail", clear

collapse (max) hop, by(DEP ARR n ARR_country DEP_country) 

collapse (min) hop, by(*country)

histogram hop, graphregion(color(white)) xlabel(0(1)12) discrete lcolor(ltblue) fcolor(ltblue%60) start(0) xtitle("# hops on fastest route")
graph export "$figures/hops_distribution.png", replace

summ hop, detail

*/

/*

rename DEP D_country
rename ARR A_country
merge m:1 D_country using $data/iso2tobaci, keepusing(iso_i)
tab D_country if _merge==1
drop if _merge==2
drop _merge
rename iso_i iso_i2
rename D_country D_country2
rename A_c D_country
merge m:1 D_country using $data/iso2tobaci, keepusing(iso_i)
drop if _merge==2
drop _merge
rename iso_i iso_j
rename iso_i iso_i
rename D_country A_country
rename D_country D_country

collapse (min) hop, by(iso*)

save $data/hops_countrylevel_baci, replace

*/


*********************************************************************************************************
********************* how many intermediate countries do we have? ***************************************
*********************************************************************************************************




u "$data/fullhrs12", clear
bysort *acid: g N=_N
egen maxN=max(N), by(DEP ARR)
keep if N==maxN
g tt=A_time-D0_t
collapse (mean) tt, by(*acid DEP ARR)
bysort DEP ARR: g n=_n
compress
reshape long D@_acid, i(DEP ARR n tt) j(hop)

sort DEP ARR n hop
bysort DEP ARR  n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.
bysort DEP ARR  n: g A_acid = D_acid[_n+1]
drop if A_acid==.

rename D_ DD
rename A_ AA
rename DEP D_acid
rename ARR A_acid

merge m:1 A_acid D_acid using "$data/port_to_port_sample_small", keepusing(*country *name)
drop if _merge==2
drop _merge

rename A_country ARR_country
rename D_country DEP_country
rename A_port ARR_port_name
rename D_port DEP_port_name

rename D_acid DEP
rename A_acid ARR
rename DD D_acid
rename AA A_acid

merge m:1 A_acid D_acid using "$data/port_to_port_sample_small", keepusing(*country *name)
drop if _merge==2
drop _merge


collapse (mean) tt, by(DEP ARR *country n) // collapse over intermediate ports in the same country
drop if ARR_c==DEP_c
drop if A_country==DEP_country //drop hop in the same country
drop if D_c == ARR_co //drop hop in the same country
drop if D_c == A_c



bysort DEP ARR n: g N=_N
count if N>4
di r(N)/_N

collapse (min) N, by(DEP_c ARR_c)

replace N=N-1

histogram N, graphregion(color(white)) xlabel(0(1)10) discrete lcolor(ltblue) fcolor(ltblue%60) start(0) xtitle("min # hops (intermediate countries) on fastest route")
graph export "$figures/hops_distribution.png", replace

count if N>2
di r(N)/_N

summ N, detail






************************************************************************************Ü*************
********************* Sum stats: share of trade traveling on indirect routes *********************
**************************************************************************************************

u "$data/all_q", clear
keep if year==2016
collapse (sum) value, by(iso_i iso_j)
save "$data/all_y_2016", replace



u "$data/ship_port_to_port_final", clear


keep if ballast_55!=1
collapse (count) ship_id, by(A_port_name D_port_name *country)

merge m:1 D_country using "$input/iso2tobaci", keepusing(iso_i)
tab D_country if _merge==1
drop if _merge==2
drop _merge
rename iso_i iso_i2
rename D_country D_country2
rename A_c D_country
merge m:1 D_country using "$input/iso2tobaci", keepusing(iso_i)
drop if _merge==2
drop _merge
rename iso_i iso_j
rename iso_i iso_i

collapse (count) ship_id, by(iso*)

save "$data/direct_connections_baci", replace


u "$baci/baci2016_bil", clear

*merge 1:1 iso_i iso_j using $data/hops_countrylevel_baci
*drop if _merge==2 /*small islands not found in baci*/
*drop _merge

merge 1:1 iso_i iso_j using "$data/direct_connections_baci"
drop if _merge==2 /*small islands not found in baci*/
drop _merge
egen tEXP=total(v)
egen EXPconnect=total(v) if ship_id!=. & ship_id!=0
g sEXP=EXPcon/tEXP
sum sEXP

rename ship direct
save "$data/summNetwork", replace


/*


************************************************************************************
************ figure with average ship size and total tonnage ***********************
************************************************************************************


u $data/ship_port_to_port_final, clear

collapse (median) mdwt=dwt mteu=teu (mean) dwt teu dwt_usage_55 (sum) Tdwt=dwt Tteu=teu Tu=dwt_usage_55 (count) N=dwt Nu=dwt_usage_55, by(A_port* A_c* D_p* D_c )

foreach var of varlist dwt teu Tdw Tt m* N dwt_u  Tu {
g ln`var' =log(`var')
}


twoway (scatter  lndwt  lnTu, msize(.5)) (lfit lndwt lnTu), graphregion(color(white)) ytitle("average ship size in log(DWT), by port pair") xtitle("total traffic in log(tonnes), by port pair") legend(off)
graph export $figures/plot_asize_volume.png, replace

twoway (scatter  lnmdwt  lnTu, msize(.5)) (lfit lnmdwt lnTu), graphregion(color(white)) ytitle("median ship size in log(DWT) by port pair") xtitle("total traffic in log(tonnes), by port pair") legend(off)
graph export  $figures/plot_medsize_volume.png, replace


*/



**************************************************************
*************** summary stats on PanExposure *****************
**************************************************************

** make PanExposure for Baci country codes


u "$data/xj2_small", clear
rename A* D*
rename x_j OUT_D
cross using "$data/xj2_small"
rename x_j OUT_A

drop OUT_A
merge m:1 A_acid using "$data/IN_A_acid_small"
drop _merge
rename dwt_usage OUT_A /*this is actually IN, but to preserve the code...*/

* merge new exposure

merge 1:1 *acid using "$data/XPanama_byHrPRE514"
keep if _merge==3
drop _merge
* the merge==1 observation are routes to/from ports that have no trips in the pre-period

* merge country codes + iso codes compatible with BACI

merge 1:1 *acid using "$data/port_to_port_sample_small", keepusing(*country)
keep if _merge==3
drop _merge 

merge m:1 D_country using "$input/iso2tobaci", keepusing(iso_i)
tab D_country if _merge==1
drop if _merge==2
drop _merge
rename iso_i iso_i2
rename D_country D_country2
rename A_c D_country
merge m:1 D_country using "$input/iso2tobaci", keepusing(iso_i)
drop if _merge==2
drop _merge
rename iso_i iso_j
rename iso_i iso_i
rename D_country A_country
rename D_country D_country



* weighted average across departure ports by exporting country and arrival port

foreach var of varlist steu* XP tt hop {
replace `var'=`var'*OUT_D
}

collapse (sum) XP tt hop steu* OUT_D , by(iso_i A_* OUT_A iso_j)

foreach var of varlist steu* tt hop XP {
replace `var'=`var'/OUT_D*OUT_A
}

* gen weighted average across ports of the importing country

collapse (sum) XP tt hop steu* OUT_A, by(iso_j iso_i)

foreach var of varlist steu* XP tt hop{
replace `var'=`var'/OUT_A
}

drop OUT_A


save "$data/PESPbyHr_bil514_baci", replace






************************************************************
********* share of world trade exposed to PC ***************
* and
************************************************************
********* weighted Pan exposure over export markets

u "$data/summNetwork", clear
merge 1:1 iso_i iso_j using "$data/PESPbyHr_bil514_baci"
drop if _merge==2
drop _merge

replace XP=0 if XP==.
********** number of export partners involving PC passage

count if XP>0 & XP!=.
di r(N)/_N

distinct iso_i if XP>0 & XP!=.
distinct iso_i
distinct iso_j if XP>0 & XP!=.
distinct iso_j


summ XP, detail

capture drop sEXP
egen EXPxp=total(v*XP)
g sEXP=EXPxp/tEXP
** percent of global trade passing PC
sum sEXP

** value of trade passing PC
sum EXPxp
*

* share in total trade through PC
egen cEXPxp=total(v*XP), by(iso_i)
g scEXP=cEXP/EXPxp

* share in total total
egen cEXPall=total(v), by(iso_i)
g scEXPall=cEXPall/tEXP


collapse (first) scEXP*, by(iso_i)
gsort -scEXP


** share of trade passing PC accounted for 
list in 1/20

g rank=_n
keep if rank<16
save "$data/helpmer", replace




************ country-level summary table on PC exposure 


u "$data/summNetwork", clear

merge 1:1 iso_i iso_j using "$data/PESPbyHr_bil514_baci"
drop if _merge==2
drop _merge

replace XP=0 if XP==.
capture drop sEXP
egen EXPxp=total(v*XP)
g sEXP=EXPxp/tEXP
** percent of global trade passing PC
sum sEXP
* share in total trade through PC
egen cEXPxp=total(v*XP), by(iso_j)
g scIMP=cEXP/EXPxp
* share in total total
egen cEXPall=total(v), by(iso_j)
g scIMPall=cEXPall/tEXP
collapse (first) scIMP*, by(iso_j)
gsort -scIMP


** share of trade passing PC accounted for 
list in 1/20
g rank=_n
keep if rank<16
merge 1:1 rank using "$data/helpmer"
drop _merge

format sc* %9.1f
foreach var of varlist sc* {
replace `var'=`var'*100
}

listtex rank iso_j scIMP scIMPa iso_i scEXP* using "$tables/rankPCtrade.tex", replace end(\\)
format sc* %9.1f
export delim rank iso_j scIMP scIMPa iso_i scEXP* using "$tables/rankPCtrade.csv", replace  datafmt 




u "$data/summNetwork", clear

merge 1:1 iso_i iso_j using "$data/PESPbyHr_bil514_baci"
drop if _merge==2
drop _merge

replace XP=0 if XP==.

drop tEXP EXPconn sEXP

egen timp = total(v), by(iso_j)
egen impXP = total(v*XP) , by(iso_j)

gen sIMP=impXP/timp

gsort iso_j -sIMP
bysort iso_j: g n=_n

gsort -sIMP
list iso_j sIMP if n==1 & sIMP!=.

keep if n==1
keep iso_j sIMP

*# countries who import more than 10% through PC

count if sIMP>.1 & sIMP!=.
count if sIMP>.1 & sIMP!=.

* average share of imports passing PC, across countries
summ sIMP, detail

save "$data/sIMP4map", replace



**** make world maps


u "$SpaceMaps/wlddata", clear
*rename ISO2 D_country
rename ISO3 iso_j
replace iso_j="YUG" if iso_j=="SRB" | iso_j=="MNE"
replace iso_j="BLX" if iso_j=="BEL" | iso_j=="LUX"
replace iso_j="SAC" if iso_j=="ZAR" | iso_j=="SWZ" | iso_j=="NAM" | iso_j=="LSO" | iso_j=="BWA" | iso_j=="ZAF"
merge m:1 iso_j using "$data/sIMP4map"
*only 4 islands cannot be merged :-) from the ports data
*all landlocked countries are not found in the ports data
drop if _merge==2
drop _merge

spmap sIMP using "$SpaceMaps/wldcoor.dta", id(id) clmethod(custom) clb("0 .05 .1 .2 .3 .4 .5") fcolor(Blues)  legtitle("share of imports passing PC")
graph export "$figures/map_sIMP.png", replace














************* how stable is the Panama exposure measure ? **********************



* how stable is the PanExposure measure
* using the most frequent route

u $data/hrs_12_small, clear
sort post DEP ARR n hop 
collapse (sum) XP, by(post DEP ARR n)
replace XP=1 if XP==3
replace XP=0 if XP==2
collapse (mean) XP, by(DEP ARR post)
reshape wide XP, i(DEP ARR) j(post)

pwcorr XP*
*95% percent correlation
drop if XPanama0==. | XPanama1==.
count if XPanama0==XPanama1
di r(N)/_N
*95 percent observe no change at all
count if XPanama0==0 & XPanama1>0
di r(N)/_N
*1%
count if XPanama1==0 & XPanama0>0
di r(N)/_N
*1.4%


count if XPanama1>XPanama0
di r(N)/_N
count if XPanama1<XPanama0
di r(N)/_N

g diff=XPanama1-XPanama0
summ diff if XPanama1>XPanama0, detail
summ diff if XPanama1<XPanama0, detail
summ diff if XPanama1!=XPanama0, detail



u $data/fullhrs12, clear
g tt=A_t-D0_t
drop A_tim D0_t
reshape long D@_acid, i(DEP ARR tt n) j(hop)


** drop superfluous hops

sort DEP ARR n hop
bysort DEP ARR  n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.

bysort DEP ARR  n: g A_acid = D_acid[_n+1]
drop if A_acid==.
*drop if hop==0
compress
*save temp, replace

*u temp, clear

merge m:1 D_acid A_acid using $data/XPanama_ids_small
drop if _merge==2
drop _merge

collapse (sum) XP, by(DEP ARR n tt) fast
summ XP
replace XP=1 if XP==3
replace XP=0 if XP==2

egen mXP=mean(XP), by(DEP ARR)
count if n==1
count if mXP==1 & n==1
count if mXP>0 & mXP<1 & n==1
count if mXP==0 & n==1
count if mXP>.1 & mXP<.9 & n==1

count if (mXP==1 | mXP==0) & n==1
global c = r(N)
count if n==1
global d = r(N)
di $c/$d

*for 85% of all port-to-port-connections, the PC exposure measure is zero or one for any shortest route

* for pairs that have both, how much longer does it take not to use the canal

keep if mXP!=1 & mXP!=0

collapse (mean) tt, by(DEP ARR XP)

reshape wide tt, i(DEP ARR) j(XP)

g diff=tt0-tt1
summ diff, detail

g pdiff=(tt0-tt1)/tt0
summ pdiff, detail

* this is surprising !

* How does the average exposure of a port-pair change pre and post?



u "$data/fullhrs12", clear
compress
reshape long D@_acid, i(DEP ARR *time n) j(hop)


** drop superfluous hops

sort DEP ARR n hop
bysort DEP ARR  n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.


bysort DEP ARR  n: g A_acid = D_acid[_n+1]
*drop if A_acid==.
drop if hop==0

save temp2, replace


u temp2, clear
merge m:1 D_acid A_acid using "$data/XPanama_ids_small"
drop if _merge==2
drop _merge

g post=0 if A_time<24*184
replace post = 1 if post==.

g tt=A_ti-D0_t
collapse (sum) XP (mean) tt, by(DEP ARR n post) fast
summ XP
replace XP=1 if XP==3
replace XP=0 if XP==2

collapse (mean) XP tt, by(DEP ARR post) fast

reshape wide XP tt, i(DEP ARR) j(post)

save temp3, replace

u temp3, clear
pwcorr XP*
*correlation between pre and post mean exposure over all the shortest routes at the port-to-port level is .98 !

g diff = XPanama1-XPanama0
count if diff==.
global g=r(N)
di $g/_N
* 2.5% of ptp connections exist only pre or only post

drop if diff==.
count if diff==0
global a = r(N)
di $a/_N
*85% have no change in exposure

summ diff if diff!=0, detail
*the average change in exposure is +.02
g Diff=abs(diff)
summ Diff if Diff!=0
*the average absolute change is .12


count if XPanama0==0 & XPanama1>0
di r(N)/_N
*3770 (1.4%) go from zero to something
count if XPanama0==0 & XPanama1==1 
*79
count if XPanama1==0 & XPanama0>0 
di r(N)/_N
*4804 (1.5%)
count if XPanama1==0 & XPanama0==1
*33




/*




******************************************************************************* 
****** how did travel times change on routes ********************
******************************************************************************* 


** port-to-port level, pre vs post


u $data/fullhrs12, clear
g tt=A_t-D0_t
drop D0_t
reshape long D@_acid, i(DEP ARR tt n A_tim) j(hop)


** drop superfluous hops
sort DEP ARR n hop
bysort DEP ARR  n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.
bysort DEP ARR  n: g A_acid = D_acid[_n+1]
drop if A_acid==.
*drop if hop==0
compress

merge m:1 D_acid A_acid using $data/XPanama_ids_small
drop if _merge==2
drop _merge


g byte post =0
replace post=1 if A_t>184*24

collapse (sum) XP (mean) avtt=tt (median) medtt=tt, by(DEP ARR post) fast

g xp=1 if XP>0
replace xp=0 if xp==.
drop XP 

reshape wide avtt medtt xp, i(DEP ARR) j(post)


g dav = avtt1-avtt0
g dav_pc=dav/avtt0*100
g dmed = medtt1-medtt0
g dmed_pc = dmed/medtt0*100

count if dav==.
di r(N)/_N
* 3 % of pairs do not have connection both pre and post

save $data/SummStats_tt_pairs, replace

u $data/SummStats_tt_pairs, clear


summ dav_pc, detail
summ dav, detail

summ dmed_pc, detail
summ dmed, detail

summ dav_pc if xp0==1 | xp1==1, detail
summ dav if xp0==1 | xp1==1, detail

summ dmed_pc if xp0==1 | xp1==1, detail
summ dmed if xp0==1 | xp1==1, detail




******************************************************************
*********** travel times by routes
******************************************************************



u $data/fullhrs12, clear
g tt=A_t-D0_t
drop D0_t


*make route indicator

bysort *acid: g R=_n
replace R=0 if R>1
replace R=sum(R)


g byte post =0
replace post=1 if A_t>184*24

collapse (mean) avtt=tt (median) medtt=tt, by(DEP ARR *acid post R) fast



reshape long D@_acid, i(DEP ARR post R avtt medtt) j(hop)


** drop superfluous hops
sort post DEP ARR R hop
bysort post DEP ARR  R: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.
bysort post DEP ARR  R: g A_acid = D_acid[_n+1]
drop if A_acid==.
drop if hop==0
compress

merge m:1 D_acid A_acid using $data/XPanama_ids_small
drop if _merge==2
drop _merge


egen Xp=sum(XP), by(R post)
replace Xp=0 if Xp==2
replace Xp=1 if Xp==3

save $data/temp, replace

bysort R post: g n=_n
keep if n==1

drop *acid hop XP 

reshape wide avtt medtt Xp, i(DEP ARR R) j(post)


g dav = avtt1-avtt0
g dav_pc=dav/avtt0*100
g dmed = medtt1-medtt0
g dmed_pc = dmed/medtt0*100

count if dav==.
di r(N)/_N
*only 13 % of all routes exist both pre and post!

summ dav_pc, detail
summ dav, detail

summ dav_pc if Xp0>0 | Xp1>0, detail
summ dav if Xp0>0 | Xp1>0, detail


save $data/SummStats_tt_routes, replace









************************************************************************************************
********** direct travel times ************************************************
************************************************************************************************


u $data/port_to_port_ship_bal_clus_small, clear
keep if Restr==1
g travel_hs=hours(A_date-D_date)
drop if travel_hs==0
g A_day=dofc(A_date)-20453


g byte post =0
replace post=1 if A_day>184
collapse (mean) avtt=trav (median) medtt=trav (count) freq=trav, by(A_acid D_acid post)
reshape wide avtt medtt freq, i(A_ac D_ac) j(post)

merge 1:1 *acid using $data/XPanama_ids_small
drop if _merge==2
drop _merge

g dav = avtt1-avtt0
g dav_pc=dav/avtt0*100
g dmed = medtt1-medtt0
g dmed_pc = dmed/medtt0*100
g dfreq = freq1-freq0

sum dfreq, detail

sum dav, detail
sum dav_pc, detail

sum dmed if freq1>2 & freq0>2, detail
sum dmed_pc, detail



sum dav if freq1>2 & freq0>2, detail
sum dav_pc if freq1>2 & freq0>2, detail

sum dav if freq1>2 & freq0>2 & XP==1, detail
sum dav_pc if freq1>2 & freq0>2 & XP==1, detail


*/

*****************************************************************************************************************************
************************* make a dataset with routes to check overlap with Chinese data *************************
*****************************************************************************************************************************


**** based on the whole of 2016 rather than the pre-period used, e.g. for the network figures




u "$data/fullhrs12", clear
drop *time
compress
bysort *acid: g N=_N

collapse (mean) N, by(*acid DEP ARR)

egen mx=max(N), by(DEP ARR)
keep if N==mx

bysort DEP ARR: g n=_n
reshape long D@_acid, i(DEP ARR N n) j(hop)


** drop superfluous hops

sort DEP ARR n hop
bysort DEP ARR  n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.

bysort DEP ARR  n: g A_acid = D_acid[_n+1]
drop if A_acid==.
*drop if hop==0

sort DEP ARR n hop

save "$data/fullhrs12_long", replace


/*
u $data/fullhrs12_long, clear

rename A_ AA
rename ARR A_acid

merge m:1 A_acid using $data/port_id_list_balanced_small
drop _merge
rename A_* ARR_*

rename DEP A_acid
merge m:1 A_acid using $data/port_id_list_balanced_small
drop _merge
rename A_* DEP_*

keep if ARR_coun=="CN" | DEP_count=="CN"

rename D_a A_acid
merge m:1 A_acid using $data/port_id_list_balanced_small
drop if _merge==2
drop _merge
rename A_* D_*

rename AA A_acid
merge m:1 A_acid using $data/port_id_list_balanced_small
drop if _merge==2
drop _merge

drop mx

drop if ARR_coun=="CN" & DEP_count=="CN"

sort DEP_a ARR_a n hop

save $data/routes_CN, replace
*/










****** how sparse is the network ?

u "$data/port_to_port_ship_bal_clus_small", clear 
drop A_port_id D_port_id
drop if A_aci==D_acid

sum A_acid
global Aid = r(max)
global pairs = r(max)*(r(max)-1)
di "$pairs"
sum D_acid
global Did = r(max)


egen pid=group(D_acid A_acid)
summ pid
global links = r(max)

di $links/$pairs


egen cid=group(*country) if A_co != D_co
summ cid
global clinks = r(max)

egen Ac=group(A_co)
summ Ac
global cpairs=r(max)*(r(max)-1)

di $clinks/$cpairs


** we have 1375 country-to-country connections

/*

** how sparse is the network in terms of country pairs that actually trade ?
u "$baci/baci2016_bil", clear
** we actually have more trading country pairs than pairs with container ports (bc of the landlocked countries) 
count



use "/Users/inga/Documents/Projekte/Shipping/Comtrade/CountryPairLevel/comtrade_bil_0019.dta", clear
keep if year==2016
keep if tradeflowcode==1 | tradeflowcode==3
rename reporteriso iso_j
rename partneriso iso_i
keep iso*
save temp, replace

use "/Users/inga/Documents/Projekte/Shipping/Comtrade/CountryPairLevel/comtrade_bil_0019.dta", clear
keep if year==2016
keep if tradeflowcode==2 | tradeflowcode==4
rename reporteriso iso_i
rename partneriso iso_j
keep iso*
append using temp

duplicates drop

save "/Users/inga/Documents/Projekte/Shipping/Comtrade/CountryPairLevel/countrypairs_trading_2016", replace


u "$data/port_to_port_ship_bal_clus_small", clear 

collapse (first) ship_id, by(A_country)

drop if A_c==""
keep A_co
save temp, replace
rename A_c D_country
cross using temp
rename D_country iso2
merge m:1 iso2 using $data/iso2toiso3_uqe, keepusing(iso3)
drop if _merge==2
drop _merge
rename iso3 iso_i
drop iso2
rename A_c iso2
merge m:1 iso2 using $data/iso2toiso3_uqe, keepusing(iso3)
drop if _merge==2
drop _merge
rename iso3 iso_j
drop iso2

duplicates tag iso*, g(tag)
drop if iso_i=="" | iso_j==""

merge 1:1 iso_i iso_j using "/Users/inga/Documents/Projekte/Shipping/Comtrade/CountryPairLevel/countrypairs_trading_2016"
drop if _merge==1

count 
count if _merg==3
di 1375/r(N)

// .08% of the countrypairs with positive trade


*/

****************************************************************************************
***************** how much of the comtrade data do we have in the sample ? **********************
****************************************************************************************

/*

import delim using "$rawdata/comtrade/comtrade20132017.csv", clear
tab reporteriso
collapse (sum) tradeval, by(year)
save temp, replace

import delim using "$rawdata/comtrade/comtrade20182019.csv", clear
collapse (sum) tradeval, by(year)
append using temp

save $data/comtrade_wld201319, replace


u $data/qregdata, clear
eststo main: reghdfe lnv postXP postNEOP_p  postxlnd  postxcont postxcoml  fta, absorb(cid_i#cid_j cid_i#perid cid_j#perid) cluster(cid_i cid_j) noconst
keep if _est_main==1
collapse (sum) value, by(year)

merge 1:1 year using $data/comtrade_wld201319
drop _mer

collapse (sum) val tra

g s=value/tradeval
list s 




******************************************************************************************
****************** stats on the algorithm: last run **************************************


import delim algorithm/StatsLastJob514ports.txt, clear rowrange(4) delim(" ", collapse)
keep if substr(v1,-1,1)=="+"
drop if v4=="0"
drop format
g unit = substr(v4,-1,1)
tab unit
g MaxRss= substr(v4,1,5)
destring MaxRss, replace
replace MaxRss=MaxRss*1000 if unit=="M"
replace unit = "K" if unit=="M"

g double dt=clock(sacct, "hms")


collapse (mean) adt=dt aMRss=MaxRss (max) mxdt=dt mxRss=MaxRss (sum) sdt=dt

g Adt=string(adt, "%tc")
g Mdt=string(mxdt, "%tc")
g Sdt=string(sdt, "%tc")
g SDT=hours(sdt)





*/


